#
###################################################################################################
# Survey Experience and Nonresponse in an Online Probability Panel: A Survival Analysis 
###################################################################################################
#
#### ANALYSIS #### 
#
# Set working directory to the project root directory
#
#### Table S1: Survey response at each survey invitation ####

library(questionr)
library(tidyverse)
library(dplyr)
#
data4_stacklong <- readRDS("data4_stacklong.rds")
#
# ind_sequence as a factor
data4_stacklong <- data4_stacklong %>%
  mutate(ind_sequence.f = factor(ind_sequence))

# Summarise counts for each time period
life_table <- data4_stacklong %>%
  group_by(Time_Period = ind_sequence.f) %>%
  summarise(
    Total_Cases = n(),
    Y_0_Response = sum(y == 0),
    Y_1_Nonresponse = sum(y == 1),
    .groups = "drop"
  ) %>%
  # Add a total row
  bind_rows(
    summarise(., 
              Time_Period = "Total",
              Total_Cases = sum(Total_Cases),
              Y_0_Response = sum(Y_0_Response),
              Y_1_Nonresponse = sum(Y_1_Nonresponse))
  )

# Print the table
life_table

#### Figure 1: Overall hazard of first nonresponse (with 95% confidence intervals), estimated from a model without explanatory variables.  ####
library(marginaleffects)
library(ggplot2)
# 
m1y <- glm(y ~ ind_sequence.f, data = data4_stacklong, family = binomial)

library(broom)
library(openxlsx)

# Tidy up the model summaries - with p-values
tidy_m1y <- tidy(m1y) %>%
  mutate(
    estimate  = round(estimate, 3),    
    std_error = round(std.error, 3),   
    p_value   = round(p.value, 3)     
  ) %>%
  select(term, estimate, std_error, p_value)

# Write to Excel
write.xlsx(list(Model_Imp = tidy_m1y), "tidy_m1y.xlsx")

new_data <- data.frame(ind_sequence.f = factor(levels(data4_stacklong$ind_sequence.f),
                                               levels = levels(data4_stacklong$ind_sequence.f)))

# Get predicted probabilities for each level of ind_sequence.f
pred <- predictions(m1y, newdata = new_data, type = "response")

# Add predicted probabilities and confidence intervals to new_data
plot_data <- new_data %>%
  mutate(hazard_prob = pred$estimate, 
         lower_ci = pred$conf.low, 
         upper_ci = pred$conf.high) %>%
  filter(as.numeric(as.character(ind_sequence.f)) <= 20)  # Ensure only levels 1 to 20

# Plot the predictions with confidence intervals
ggplot(plot_data, aes(x = as.numeric(as.character(ind_sequence.f)), y = hazard_prob)) +
  geom_line(color = "grey50", linewidth = 0.75) +  # Line plot in grey
  geom_point(color = "black", size = 1.5) +  # Points
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.2, color = "black") +  # Error bars
  geom_text(aes(label = sprintf("%.2f", hazard_prob)), 
            hjust = -0.2, vjust = -0.5, size = 3.25) +  # Display hazard probabilities as labels
  labs(x = "Survey Invitations", y = "Hazard Probability") +  # Axis labels
  scale_x_continuous(breaks = 1:20) +  # Show levels 1 to 20 on x-axis
  theme_minimal() +  
  theme(axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12))

#### Data imputation preparation ####

data_stata_short <- data4_stacklong %>%
  select(InternalSerialNumber, ind_sequence,
         dummynotenjoy, lead_nonresponse, 
         nonresponse, phonemode, nooutlminutes, median, n_invites, 
         days_since_recruit_66to99, days_since_recruit_100to241, days_since_recruit_over241, 
         abos,
         RS_Weight, weight_stratum2, weight_stratum3, weight_stratum4, weight_stratum5,
         age25_34, age35_44, age45_54, age55_64, age65_74, age75plus, Male, White, UKborn, RS_Degree,
         housing_tenure, RS_WorkingStatus_Binary, RS_CohabitationStatus_Binary, 
         NE_England, NW_England, Yorkshire_Humberside, E_Midlands, W_Midlands, Eastof_England, SE_England, SW_England, NI, Scotland, Wales, SF_IMDDecile,
         follows_politics,
         RS_BFI_2_XS_ExtraversionScore, RS_BFI_2_XS_AgreeablenessScore, RS_BFI_2_XS_ConscientiousnessScore, RS_BFI_2_XS_NegativeEmotionalityScore, RS_BFI_2_XS_OpenMindednessScore,
         w18_dummy, w20_dummy, w22_dummy, w23_dummy, w23w2_dummy, w23w3_dummy
  )
#
# Rename the variable
data_stata_short <- data_stata_short %>% 
  rename(RS_BFI_2_XS_Conscient = RS_BFI_2_XS_ConscientiousnessScore)

# Rename the variable
data_stata_short <- data_stata_short %>% 
  rename(RS_BFI_2_XS_NegativeEmotion = RS_BFI_2_XS_NegativeEmotionalityScore)

saveRDS(data_stata_short, "data_stata_short.rds")
# Save the dataset in Stata format
library(haven)
write_dta(data_stata_short, "data_stata_short.dta")
#
#### Step in Stata #### 
# Next step is completed in Stata to calculate predicted probabilities for the missing 'enjoyed' variable 
# Run 'Stata do file'
# Predicted probabilities are saved as 'muhat' in csv file 'predicted_probabilities' in the working directory
# Run the Stata code in Stata, then return to R and continue with the code below
#
##### Add muhat from Stata to data4stacklong  ####
#
predicted_probabilities <- read_csv("predicted_probabilities.csv", show_col_types = FALSE) %>%
  select(InternalSerialNumber, ind_sequence, muhat)

data4_stacklong_muhat <- merge(data4_stacklong, predicted_probabilities, by = c("InternalSerialNumber", "ind_sequence"), all.x = TRUE)

# Arrange the dataset
data4_stacklong_muhat <- data4_stacklong_muhat %>%
  arrange(InternalSerialNumber, ind_sequence)
#
saveRDS(data4_stacklong_muhat, "data4_stacklong_muhat.rds")
#
#### Data imputation ####
data4_stacklong_muhat <- readRDS("data4_stacklong_muhat.rds")
data4_stacklong_muhat$ind_sequence.f <- factor(data4_stacklong_muhat$ind_sequence)

# Create dummy variables for each level of ind_sequence using a loop
for(i in 1:23) {
  data4_stacklong_muhat <- data4_stacklong_muhat %>%
    mutate(!!paste0("ind_sequence.f", i) := ifelse(ind_sequence == i, 1, 0))
}

library(mice)
# creating n.imputations imputed datasets
n.imputations <- 50
data.imputed <- vector("list",n.imputations)
ind.missing <- is.na(data4_stacklong_muhat$dummynotenjoy)
n.missing <- sum(ind.missing)
muhat.dummynotenjoy.impute <- data4_stacklong_muhat$muhat[ind.missing]

for(i in seq(n.imputations)){
  data.i <- data4_stacklong_muhat
  data.i$dummynotenjoy[ind.missing] <- rbinom(n.missing,1,muhat.dummynotenjoy.impute)
  data.imputed[[i]] <- data.i
}

# re-calculate prev_dummynotenjoy based on imputed dummynotenjoy
# Create a function to update prev_dummynotenjoy
update_prev_dummynotenjoy <- function(data.imputed) {
  # Arrange data by InternalSerialNumber and ind_sequence
  data.imputed <- data.imputed %>% 
    arrange(InternalSerialNumber, ind_sequence)
  
  # Replace NA values in dummynotenjoy with 99
  data.imputed <- data.imputed %>%
    mutate(dummynotenjoy = ifelse(is.na(dummynotenjoy), 99, dummynotenjoy))
  
  # Create prev_dummynotenjoy
  data.imputed <- data.imputed %>%
    group_by(InternalSerialNumber) %>%
    mutate(prev_dummynotenjoy = lag(dummynotenjoy))
  
  # Replace all 99 values with NA in dummynotenjoy
  data.imputed <- data.imputed %>%
    mutate(dummynotenjoy = ifelse(dummynotenjoy == 99, NA_real_, dummynotenjoy))
  
  # Replace all 99 values with NA in prev_dummynotenjoy
  data.imputed <- data.imputed %>%
    mutate(prev_dummynotenjoy = ifelse(prev_dummynotenjoy == 99, NA_real_, prev_dummynotenjoy))
  
  return(data.imputed)
}

# Update each imputed dataset
data.imputed_updated <- lapply(data.imputed, FUN = update_prev_dummynotenjoy)

# re-calculate prop_lagged_dummynotenjoy_excl_last
# Create a function to update prop_lagged_dummynotenjoy_excl_last
update_prop_lagged <- function(data.imputed_updated) {
  data.imputed_updated <- data.imputed_updated %>%
    arrange(InternalSerialNumber, invitedate) %>%
    group_by(InternalSerialNumber) %>%
    mutate(
      count_prev_dummynotenjoy = cumsum(!is.na(prev_dummynotenjoy) & prev_dummynotenjoy %in% c(0, 1)),
      sum_prev_dummynotenjoy = cumsum(ifelse(!is.na(prev_dummynotenjoy) & prev_dummynotenjoy %in% c(1), prev_dummynotenjoy, 0)),
      prop_lagged_dummynotenjoy = ifelse(count_prev_dummynotenjoy > 0, round(sum_prev_dummynotenjoy / count_prev_dummynotenjoy, 2), NA),
      prop_lagged_dummynotenjoy_excl_last = lag(prop_lagged_dummynotenjoy)
    )
  
  return(data.imputed_updated)
}
# Update each imputed dataset
data.imputed_updated <- lapply(data.imputed_updated, FUN = update_prop_lagged)

# recode prop_lagged_dummynotenjoy_excl_last so that where a case has ind_sequence 2, then it is coded 0. 
mutate_prop_lag <- function(data.imputed_updated) {
  data.imputed_updated <- data.imputed_updated %>%
    mutate(prop_lag_dummynotenj_new = ifelse(ind_sequence == 2, 0, prop_lagged_dummynotenjoy_excl_last))
  return(data.imputed_updated)
}
# Apply the function to each data frame in the list
data.imputed_updated <- lapply(data.imputed_updated, mutate_prop_lag)

# save
saveRDS(data.imputed_updated, file = "data.imputed_updated.rds")

#### Get final pooled coefficients ####
# 
#### Main effects model (Table 2 (columns 2, 3, 4) and Table S2 (columns 2, 3, 4)) ####
#
m1 <- formula(y ~ ind_sequence.f + prev_dummynotenjoy
              + prop_lag_dummynotenj_new:third_or_later_invite
              + days_since_recruit_66to99 + days_since_recruit_100to241 + days_since_recruit_over241 
              + days_since_lastinvite_30to68 + days_since_lastinvite_69to112 + days_since_lastinvite_over112 
              + prev_phonemode
              + abos
              + prev_nooutlminutes
              + nooutlmedian_prev_survey
              + age25_34 + age35_44 + age45_54 + age55_64 + age65_74 + age75plus + Male + disability + White + UKborn + RS_Degree
              + housing_tenure + RS_WorkingStatus_Binary + RS_CohabitationStatus_Binary                              
              + NE_England + NW_England + Yorkshire_Humberside + E_Midlands + W_Midlands + Eastof_England + SE_England + SW_England + NI
              + Scotland + Wales + SF_IMDDecile
              + follows_politics
              + RS_BFI_2_XS_AgreeablenessScore + RS_BFI_2_XS_ConscientiousnessScore
              + RS_BFI_2_XS_ExtraversionScore + RS_BFI_2_XS_NegativeEmotionalityScore + RS_BFI_2_XS_OpenMindednessScore
)

m1fits <- vector("list", length(data.imputed_updated))
for (i in seq_along(m1fits)) {
  m1fits[[i]] <- glm(m1, family = binomial(), data = data.imputed_updated[[i]])
}

# 'fits' includes the results from the 50 imputations
library(mice)
# Using functions from the mice package to pool the results  
m1fit.res <- pool(as.mira(m1fits))  
# summary(m1fit.res)
estimates <- summary(m1fit.res)$estimate

# replace the coefficients from first imputed dataset with the pooled coefficients
m1fits[[1]]$coefficients[] <- estimates
# Save the modified first imputed dataset as fits.1.res
m1fits.1 <- m1fits[[1]]
saveRDS(m1fits.1, file = "m1fits.1.res.rds")

# Excel output of final model coefficients

# Tidy up the model summaries - with p-values
tidy_m1fit.res <- tidy(m1fit.res) %>%
  mutate(
    estimate  = round(estimate, 3),    
    std_error = round(std.error, 3),   
    p_value   = round(p.value, 3)     
  ) %>%
  select(term, estimate, std_error, p_value)

# Write to Excel
write.xlsx(list(Model_Imp = tidy_m1fit.res), "tidy_m1.xlsx")
#
#### Interactions effects model (Table 2 (columns 5, 6, 7) and Table S2 (columns 5, 6, 7)) ####

m1i <- formula(y ~ ind_sequence.f + prev_dummynotenjoy + ind_sequence:prev_dummynotenjoy
               + prop_lag_dummynotenj_new:third_or_later_invite
               + days_since_recruit_66to99 + days_since_recruit_100to241 + days_since_recruit_over241 
               + days_since_lastinvite_30to68 + days_since_lastinvite_69to112 + days_since_lastinvite_over112 
               + prev_phonemode + ind_sequence:prev_phonemode     
               + abos
               + prev_nooutlminutes
               + nooutlmedian_prev_survey + ind_sequence:nooutlmedian_prev_survey
               + age25_34 + age35_44 + age45_54 + age55_64 + age65_74 + age75plus + Male + disability + White + UKborn + RS_Degree
               + housing_tenure + RS_WorkingStatus_Binary + RS_CohabitationStatus_Binary                              
               + NE_England + NW_England + Yorkshire_Humberside + E_Midlands + W_Midlands + Eastof_England + SE_England + SW_England + NI
               + Scotland + Wales + SF_IMDDecile
               + follows_politics
               + RS_BFI_2_XS_AgreeablenessScore + RS_BFI_2_XS_ConscientiousnessScore
               + RS_BFI_2_XS_ExtraversionScore + RS_BFI_2_XS_NegativeEmotionalityScore + RS_BFI_2_XS_OpenMindednessScore
               + ind_sequence:RS_BFI_2_XS_AgreeablenessScore
               + ind_sequence:RS_BFI_2_XS_ConscientiousnessScore
               + ind_sequence:RS_BFI_2_XS_ExtraversionScore
               + ind_sequence:RS_BFI_2_XS_NegativeEmotionalityScore
               + ind_sequence:RS_BFI_2_XS_OpenMindednessScore
)

m1ifits <- vector("list", length(data.imputed_updated))
for (i in seq_along(m1ifits)) {
  m1ifits[[i]] <- glm(m1i, family = binomial(), data = data.imputed_updated[[i]])
}

# 'fits' includes the results from the 50 imputations
# Using functions from the mice package to pool the results  
m1ifit.res <- pool(as.mira(m1ifits))  
# summary(m1ifit.res)
estimates <- summary(m1ifit.res)$estimate

# replace the coefficients from first imputed dataset with the pooled coefficients
m1ifits[[1]]$coefficients[] <- estimates
# Save the modified first imputed dataset as fits.1.res
m1ifits.1 <- m1ifits[[1]]
saveRDS(m1ifits.1, file = "m1ifits.1.res.rds")

# Excel output of final model coefficients
tidy_m1ifit.res <- tidy(m1ifit.res) %>%
  mutate(
    estimate   = round(estimate, 3),      
    std_error  = round(std.error, 3),      
    p_value    = round(p.value, 3)        
  ) %>%
  select(term, estimate, std_error, p_value)  # Keep only these columns

# print(tidy_m1ifit.res)
# Write to Excel
write.xlsx(list(Model_Imp = tidy_m1ifit.res), "tidy_m1i.xlsx")

#### Create a dataset for marginal effects and predicted probabilities ####
#
data.imputed_updated <- readRDS("data.imputed_updated.rds")
fits.1i.res <- readRDS("m1ifits.1.res.rds")

# Extract the first imputed dataset
first_dataset <- data.imputed_updated[[1]]
#names(first_dataset)

# Get an average for prop_lag_dummynotenj_new and add to first_dataset
prop_lag_dummynotenj_new.av <- first_dataset$prop_lag_dummynotenj_new

for(i in 2:length(data.imputed_updated)){
  prop_lag_dummynotenj_new.av <- prop_lag_dummynotenj_new.av + data.imputed_updated[[i]]$prop_lag_dummynotenj_new
}
first_dataset$prop_lag_dummynotenj_new <- prop_lag_dummynotenj_new.av/length(data.imputed_updated)

first_dataset <- first_dataset[first_dataset$ind_sequence.f %in% 2:23, ] 
#
####  Plot 2 ####
library(scales)

# Define all predictors and their values, plus display labels
predictors <- list(
  prev_phonemode = list(values = c(1,0), labels = c("Previous phone survey", "Previous online survey")),
  nooutlmedian_prev_survey = list(values = c(8,15,27), labels = c("Previous average survey length: 8 mins", 
                                                            "Previous average survey length: 15 mins", 
                                                            "Previous average survey length: 27 mins")),
  prev_nooutlminutes = list(values = c(8,15,27), labels = c("Previous individual survey length: 8 mins", 
                                                         "Previous individual survey length: 15 mins", 
                                                         "Previous individual survey length: 27 mins")),
  days_since_lastinvite_under30 = list(values = 1, labels = "Under 30 days since last survey invite"),
  days_since_lastinvite_30to68 = list(values = 1, labels = "30-68 days since last survey invite"),
  days_since_lastinvite_69to112 = list(values = 1, labels = "69-112 days since last survey invite"),
  days_since_lastinvite_over112 = list(values = 1, labels = "Over 112 days since last survey invite"),
  days_since_recruit_under66 = list(values = 1, labels = "Under 66 days since recruitment"),
  days_since_recruit_66to99 = list(values = 1, labels = "66-99 days since recruitment"),
  days_since_recruit_100to241 = list(values = 1, labels = "100-241 days since recruitment"),
  days_since_recruit_over241 = list(values = 1, labels = "Over 241 days since recruitment"),
  prev_dummynotenjoy = list(values = c(0,1), labels = c("Enjoyed previous survey", "Did not enjoy previous survey")),
  RS_BFI_2_XS_AgreeablenessScore = list(values = c(1,5), labels = c("Agreeableness (Low)", "Agreeableness (High)")),
  RS_BFI_2_XS_ConscientiousnessScore = list(values = c(1,5), labels = c("Conscientiousness (Low)", "Conscientiousness (High)")),
  RS_BFI_2_XS_ExtraversionScore = list(values = c(1,5), labels = c("Extraversion (Low)", "Extraversion (High)")),
  RS_BFI_2_XS_NegativeEmotionalityScore = list(values = c(1,5), labels = c("Negative emotionality (Low)", "Negative emotionality (High)")),
  RS_BFI_2_XS_OpenMindednessScore = list(values = c(1,5), labels = c("Open-mindedness (Low)", "Open-mindedness (High)"))
)

# Helper function for marginal predictions
get_preds <- function(var, vals, labels) {
  df <- data.frame(
    category = character(),
    predicted_probability = numeric(),
    conf_low = numeric(),
    conf_high = numeric(),
    stringsAsFactors = FALSE
  )
  
  for (i in seq_along(vals)) {
    val <- vals[i]
    
    modified_data <- first_dataset %>%
      filter(ind_sequence.f == 3) %>%
      mutate(
        # swap enjoyment coding
        prev_dummynotenjoy = ifelse(var == "prev_dummynotenjoy", ifelse(val == 0, 1, 0), prev_dummynotenjoy),
        # set variable of interest
        !!var := val,
        # set other survey-lengths to baseline
        median_prev_survey = ifelse(var == "median_prev_survey", val, 15),
        prev_newminutes = ifelse(var == "prev_newminutes", val, 15),
        days_since_lastinvite_under30 = ifelse(var == "days_since_lastinvite_under30", val, 0),
        days_since_lastinvite_30to68 = ifelse(var == "days_since_lastinvite_30to68", val, 0),
        days_since_lastinvite_69to112 = ifelse(var == "days_since_lastinvite_69to112", val, 0),
        days_since_lastinvite_over112 = ifelse(var == "days_since_lastinvite_over112", val, 0),
        days_since_recruit_under66 = ifelse(var == "days_since_recruit_under66", val, 0),
        days_since_recruit_66to99 = ifelse(var == "days_since_recruit_66to99", val, 0),
        days_since_recruit_100to241 = ifelse(var == "days_since_recruit_100to241", val, 0),
        days_since_recruit_over241 = ifelse(var == "days_since_recruit_over241", val, 0)
      )
    
    pred <- avg_predictions(fits.1i.res, type = "response", newdata = modified_data)
    
    df <- df %>%
      add_row(
        category = labels[i],
        predicted_probability = pred$estimate,
        conf_low = pred$conf.low,
        conf_high = pred$conf.high
      )
  }
  return(df)
}

# Loop over all predictors
predicted_probs_df <- do.call(rbind, lapply(names(predictors), function(v) {
  get_preds(v, predictors[[v]]$values, predictors[[v]]$labels)
}))

# Factor for vertical order
predicted_probs_df$category <- factor(predicted_probs_df$category, levels = rev(unlist(lapply(predictors, `[[`, "labels"))))

# Plot
ggplot(predicted_probs_df, aes(x = predicted_probability, y = category)) +
  geom_point(size = 2, color = "black") +
  geom_errorbarh(aes(xmin = conf_low, xmax = conf_high), height = 0.2, color = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  scale_x_continuous(
    limits = c(0, 0.45),
    breaks = seq(0, 0.45, 0.05),
    labels = number_format(accuracy = 0.01)
  ) +
  labs(
    x = "Predicted probability of first nonresponse at survey invitation 3",
    y = NULL
  ) +
  theme_minimal(base_size = 10) +
  theme(
    axis.text.y = element_text(size = 9),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(margin = margin(t = 8))
  )


#### Figure 3, probabilities for prev_phonemode by ind_sequence ####
#
first_dataset$prev_phonemode <- 1 # Previous phone mode
first_dataset$pred <- predict(fits.1i.res, type = "response", newdata = first_dataset)
# Do the averaging using the method above
pr1.tmp <- data.frame(time=unique(first_dataset$ind_sequence),
                      pr=unique(ave(first_dataset$pred,first_dataset$ind_sequence,FUN=function(x){mean(x,na.rm=T)}))                   
)
first_dataset$prev_phonemode <- 0 # Previous online mode
first_dataset$pred <- predict(fits.1i.res, type = "response", newdata = first_dataset)
# Do the averaging using the method above
pr0.tmp <- data.frame(time=unique(first_dataset$ind_sequence),
                      pr=unique(ave(first_dataset$pred,first_dataset$ind_sequence,FUN=function(x){mean(x,na.rm=T)}))                   
)

options(scipen = 999)
# print(pr1.tmp)
# print(pr0.tmp)

# Plot the first line for prev_phonemode = 1
plot(2:23, pr1.tmp$pr, type = "l", lwd = 2, col = "black", 
     xlab = "Survey invitations", ylab = "Hazard of first nonresponse",
     main = "",
     xlim = c(2, 16), ylim = c(0, 0.75))

# Add the second line for prev_phonemode = 0
lines(2:23, pr0.tmp$pr, type = "l", lwd = 2, col = "black", lty = 2)  

# Add x-axis tick marks for every value from 3 to 23
axis(1, at = 2:23)

# Add legend
legend("topright", 
       legend = c("Phone","Online"), 
       col = c("black", "black"), 
       lty = c(1, 2, 3), 
       lwd = 2)

#### Table 3, marginal effects at survey invitation 3 ####

#### Enjoyed ####
## Step done prior to each plot with fitted values ##

calculate_marginal_effects <- function(dataset, days_var, median_q, minutes_q) {
  filtered_data <- dataset %>%
    filter(
      ind_sequence.f == 3,
      prev_dummynotenjoy == 0,    
      !!sym(days_var) == 1,
      !!sym(median_q) == 1,
      !!sym(minutes_q) == 1
    )
  
  filtered_data$prev_phonemode <- 0
  pr0 <- mean(predict(fits.1i.res, type = "response", newdata = filtered_data), na.rm = TRUE)
  
  filtered_data$prev_phonemode <- 1
  pr1 <- mean(predict(fits.1i.res, type = "response", newdata = filtered_data), na.rm = TRUE)
  
  options(scipen = 999)
  print(pr0)
  print(pr1)
}

# Run for different conditions
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q3", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q5", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q5", "prev_nooutlminutes_q3")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q3", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q5", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q5", "prev_nooutlminutes_q3")

#### Did not enjoy ####
## Step done prior to each plot with fitted values ##

# Function to calculate marginal effects
calculate_marginal_effects <- function(dataset, days_var, median_q, minutes_q) {
  filtered_data <- dataset %>%
    filter(
      ind_sequence.f == 3,
      prev_dummynotenjoy == 1,    
      !!sym(days_var) == 1,
      !!sym(median_q) == 1,
      !!sym(minutes_q) == 1
    )
  
  filtered_data$prev_phonemode <- 0
  pr0 <- mean(predict(fits.1i.res, type = "response", newdata = filtered_data), na.rm = TRUE)
  
  filtered_data$prev_phonemode <- 1
  pr1 <- mean(predict(fits.1i.res, type = "response", newdata = filtered_data), na.rm = TRUE)
  
  options(scipen = 999)
  print(pr0)
  print(pr1)
  
}

# Run for different conditions
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q3", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q5", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_4to29", "median_prev_survey_q5", "prev_nooutlminutes_q3")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q3", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q5", "prev_nooutlminutes_q5")
calculate_marginal_effects(first_dataset, "days_since_lastinvite_over112", "median_prev_survey_q5", "prev_nooutlminutes_q3")
